########################
# 2. Multi-verse data collection
########################

load('analysis.RData')

library(lme4)
library(broom.mixed)
library(broom)
library(sandwich)
library(lmtest)
library(parallel)
library(pbapply)

analysis$party_place <- ifelse(analysis$party == 'Democrat', analysis$dem_place, analysis$gop_place) # the DV - ha!
analysis$same_party <- ifelse(analysis$party == analysis$R_party, 1, 0)
analysis$year1 <- analysis$year - 1990

# Define parameters
std_prefixes <- c("std_", "std_party_")
fed_ideologies <- c("nominate", "cf", "ada")
state_ideologies <- c("npat", "cf")
elec_levels <- c("ntl_", "state_")
elec_types <- c("symbolic", "operational")
temporal_controls <- c("none", "trend")
demographics <- c(FALSE, TRUE)
model_options <- c("random_effect", "none", "state", "year", "both")
n_options <- c(10, 30)

# Parallel setup
n_cores <- detectCores() - 1
cl <- makeCluster(n_cores)

process_combination <- function(params) {
  std_prefix <- params$std_prefix
  fed_ideology <- params$fed_ideology
  state_ideology <- params$state_ideology
  elec_level <- params$elec_level
  elec_type <- params$elec_type
  temporal_control <- params$temporal_control
  dem <- params$dem
  model_option <- params$model_option
  n_threshold <- params$n_threshold
  
  # Subset data
  data_subset <- analysis
  if (elec_level == "state_" && !is.na(n_threshold)) {
    data_subset <- data_subset[data_subset$n >= n_threshold, ]
  }
  
  # Build formula
  fed_var <- ifelse(fed_ideology == "cf", paste0(std_prefix, "fed_cf"), 
                    ifelse(fed_ideology == 'nominate', paste0(std_prefix, "nominate"), paste0(std_prefix, 'ada')))
  state_var <- ifelse(state_ideology == "cf", paste0(std_prefix, "state_cf"), paste0(std_prefix, "state_npat"))
  elec_var <- paste0(std_prefix, elec_level, elec_type)
  
  base_formula <- paste0(
    "party_place ~ ", fed_var, " + ", state_var, " + ", elec_var
  )
  
  interaction_terms <- " + self_place*same_party + same_party*party + party*st_knowledge"
  if (model_option == "state") {
    interaction_terms <- paste0(interaction_terms, " + state")
  } else if (model_option == "year") {
    interaction_terms <- paste0(interaction_terms, " + year")
  } else if (model_option == "both") {
    interaction_terms <- paste0(interaction_terms, " + state + year")
  }
  formula <- paste0(base_formula, interaction_terms)
  
  if (temporal_control == "trend") {
    formula <- paste0(formula, " + year1*party")
  }
  
  if (dem) {
    formula <- paste0(formula, " + nonwhite + bach + male + over50")
  }
  
  # Run the model
  if (model_option == "random_effect") {
    model <- lmer(as.formula(paste0(formula, " + (1 | state:year:party)")), data = data_subset, weights = adjusted_weight)
    model_summary <- tidy(model, effects = "fixed", conf.int = TRUE)
  } else {
    model <- lm(as.formula(formula), data = data_subset, weights = adjusted_weight)
    model_summary <- tidy(model)
    cluster_se <- vcovCL(model, cluster = ~case_id1)
    cluster_se_summary <- sqrt(diag(cluster_se))
    model_summary <- model_summary %>%
      mutate(cluster_robust_se = cluster_se_summary[term])
  }
  
  # Add additional multidata
  model_summary <- model_summary %>% mutate(
    std_prefix = std_prefix,
    fed_ideology = fed_ideology,
    state_ideology = state_ideology,
    elec_level = elec_level,
    elec_type = elec_type,
    temporal_control = temporal_control,
    demographics = dem,
    model_option = model_option,
    n_threshold = if (!is.na(n_threshold)) n_threshold else NA
  )
  
  return(model_summary)
}


# Create the grid for `elec_level != "state_"`
param_grid_non_state <- expand.grid(
  std_prefix = std_prefixes,
  fed_ideology = fed_ideologies,
  state_ideology = state_ideologies,
  elec_level = setdiff(elec_levels, "state_"),
  elec_type = elec_types,
  temporal_control = temporal_controls,
  dem = demographics,
  model_option = model_options,
  n_threshold = NA
)

# Create the grid for `elec_level == "state_"`
param_grid_state <- expand.grid(
  std_prefix = std_prefixes,
  fed_ideology = fed_ideologies,
  state_ideology = state_ideologies,
  elec_level = "state_",
  elec_type = elec_types,
  temporal_control = temporal_controls,
  dem = demographics,
  model_option = model_options,
  n_threshold = n_options
)

# Combine the two grids
param_grid <- rbind(param_grid_non_state, param_grid_state)

# Export to parallelization
clusterExport(cl, c("analysis", "std_prefixes", "fed_ideologies", "state_ideologies",
                    "elec_levels", "elec_types", "temporal_controls", "demographics", 
                    "model_options", "n_options", "process_combination", "param_grid"))
clusterEvalQ(cl, {
  library(lme4)
  library(broom.mixed)
  library(broom)
  library(sandwich)
  library(lmtest)
  library(dplyr)
})

# Run models in parallel
results <- pblapply(seq_len(nrow(param_grid)), function(i) {
  process_combination(param_grid[i, ])
}, cl = cl) # 4 hours, 21 minutes

# Combine the test results
df_results <- do.call(bind_rows, results)

rm(list=setdiff(ls(), c('df_results','results','param_grid')))
save.image('multiverse.RData')
